{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Improving Predictions with `scikit-learn`\n",
    "\n",
    "In this chapter we will try the same regression as chapter 8, but this time without departure delay; a harder problem."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Imports loaded...\n"
     ]
    }
   ],
   "source": [
    "import sys, os, re\n",
    "sys.path.append(\"lib\")\n",
    "import utils\n",
    "\n",
    "import numpy as np\n",
    "import sklearn\n",
    "import iso8601\n",
    "import datetime\n",
    "print(\"Imports loaded...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original JSON file size: 1,676,709,758 Bytes\n",
      "Training items: 5,714,008\n",
      "Data loaded...\n"
     ]
    }
   ],
   "source": [
    "# Load and check the size of our training data. May take a minute.\n",
    "print(\"Original JSON file size: {:,} Bytes\".format(os.path.getsize(\"../data/simple_flight_delay_features.jsonl\")))\n",
    "training_data = utils.read_json_lines_file('../data/simple_flight_delay_features.jsonl')\n",
    "print(\"Training items: {:,}\".format(len(training_data))) # 5,714,008\n",
    "print(\"Data loaded...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of training data in RAM: 50,897,424 Bytes\n",
      "{'Origin': 'ABQ', 'Dest': 'DFW', 'FlightDate': '2014-12-31T16:00:00.000-08:00', 'DayOfWeek': 4, 'DayOfMonth': 1, 'DayOfYear': 1, 'DepDelay': 14.0, 'FlightNum': '1024', 'CRSDepTime': '2015-01-01T07:30:00.000-08:00', 'Distance': 569.0, 'CRSArrTime': '2015-01-01T10:10:00.000-08:00', 'Carrier': 'AA', 'ArrDelay': 13.0}\n"
     ]
    }
   ],
   "source": [
    "# Inspect a record before we alter them\n",
    "print(\"Size of training data in RAM: {:,} Bytes\".format(sys.getsizeof(training_data))) # 50MB\n",
    "print(training_data[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sampled items: 1,000,000 Bytes\n",
      "Data sampled...\n"
     ]
    }
   ],
   "source": [
    "# We need to sample our data to fit into RAM\n",
    "training_data = np.random.choice(training_data, 1000000) # 'Sample down to 1MM examples'\n",
    "print(\"Sampled items: {:,} Bytes\".format(len(training_data)))\n",
    "print(\"Data sampled...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Results vectorized size: 8,000,096\n",
      "Results vectorized...\n"
     ]
    }
   ],
   "source": [
    "# Separate our results from the rest of the data, vectorize and size up\n",
    "results = [record['ArrDelay'] for record in training_data]\n",
    "results_vector = np.array(results)\n",
    "print(\"Results vectorized size: {:,}\".format(sys.getsizeof(results_vector))) # 45,712,160 bytes\n",
    "print(\"Results vectorized...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ArrDelay, DepDelay and FlightDate removed from training data...\n"
     ]
    }
   ],
   "source": [
    "# Remove the two delay fields and the flight date from our training data\n",
    "for item in training_data:\n",
    "  item.pop('ArrDelay', None)\n",
    "  item.pop('FlightDate', None)\n",
    "  item.pop('DepDelay', None)\n",
    "print(\"ArrDelay, DepDelay and FlightDate removed from training data...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CRSArr/DepTime converted to unix time...\n"
     ]
    }
   ],
   "source": [
    "# Must convert datetime strings to unix times\n",
    "for item in training_data:\n",
    "  if isinstance(item['CRSArrTime'], str):\n",
    "    dt = iso8601.parse_date(item['CRSArrTime'])\n",
    "    unix_time = int(dt.timestamp())\n",
    "    item['CRSArrTime'] = unix_time\n",
    "  if isinstance(item['CRSDepTime'], str):\n",
    "    dt = iso8601.parse_date(item['CRSDepTime'])\n",
    "    unix_time = int(dt.timestamp())\n",
    "    item['CRSDepTime'] = unix_time\n",
    "print(\"CRSArr/DepTime converted to unix time...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sampled dimensions: [1,000,000]\n",
      "Size of DictVectorized vectors: 80,000,000 Bytes\n",
      "Training data vectorized...\n"
     ]
    }
   ],
   "source": [
    "# Use DictVectorizer to convert feature dicts to vectors\n",
    "from sklearn.feature_extraction import DictVectorizer\n",
    "\n",
    "print(\"Sampled dimensions: [{:,}]\".format(len(training_data)))\n",
    "vectorizer = DictVectorizer()\n",
    "training_vectors = vectorizer.fit_transform(training_data)\n",
    "print(\"Size of DictVectorized vectors: {:,} Bytes\".format(training_vectors.data.nbytes))\n",
    "print(\"Training data vectorized...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(900000, 7434) (100000, 7434)\n",
      "(900000,) (100000,)\n",
      "Test train split performed again...\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "# Redo test/train split\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "  training_vectors,\n",
    "  results_vector,\n",
    "  test_size=0.1,\n",
    "  random_state=17\n",
    ")\n",
    "print(X_train.shape, X_test.shape)\n",
    "print(y_train.shape, y_test.shape)\n",
    "print(\"Test train split performed again...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Gradient boosting regressor instantiated...!\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "\n",
    "regressor = GradientBoostingRegressor()\n",
    "print(\"Gradient boosting regressor instantiated...!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Regressor fitted again...\n"
     ]
    }
   ],
   "source": [
    "# Refit regression on new training data\n",
    "regressor.fit(X_train, y_train)\n",
    "print(\"Regressor fitted again...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predictions made for X_test again...\n"
     ]
    }
   ],
   "source": [
    "# Predict using the test data again\n",
    "predicted = regressor.predict(X_test.toarray())\n",
    "print(\"Predictions made for X_test again...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Median absolute error:    14\n",
      "r2 score:                 0.0343\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import median_absolute_error, r2_score\n",
    "\n",
    "# Get the median absolute error again\n",
    "medae = median_absolute_error(y_test, predicted)\n",
    "print(\"Median absolute error:    {:.3g}\".format(medae))\n",
    "\n",
    "# Get the r2 score gain\n",
    "r2 = r2_score(y_test, predicted)\n",
    "print(\"r2 score:                 {:.3g}\".format(r2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeQAAAFKCAYAAADMuCxnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAGSNJREFUeJzt3V1sZOddB+C/PRuppRBiHFQu+Gi0tldZkCjelN3ALiWL\nU29WSKEoAbL0AhAgFbESlyAllWgRSHCRGyCCABVS6VIhuOAitTfWQpsoCQm7LRclZNYuhTtE1htS\nBQrCe7g4HeyxZ+ac+X7nnOeRjrI+czzzjmOf37zfc1mWBQAwXfPTLgAAIJABIAkCGQASIJABIAEC\nGQASIJABIAECGQASIJABIAHHylw0Nze3GBHrEfGViPjaOAsEABXzroh4X0RsZll2q9tFpQI58jD+\nsxEUCgDq6qcj4tPdHiwbyF+JiPjUpz4V999//wjKBAD18Prrr8dHPvKRiK9naTdlA/lrERH3339/\nrK6uDlcyAKinnl2+BnUBQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAk\nQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCAD\nQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIE\nMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAIEMgAkQCADQAKOTbsAANBLsxmxsxOxtBSxvDzt0oyP\nGjIASdrdjbhwIeLEiYiLFyNWVvKvb9+edsnGQyADkKRLlyK2ttrPbW1FPPHEdMozbgIZgOQ0mxGb\nmxF7e+3n9/by8zdvTqdc4ySQAUjOzk7vx7e3J1OOSRLIACTn+PHejy8tTaYckySQAUjOykrE+npE\no9F+vtHIz1dxtLVABiBJV65ErK21n1tby89XkXnIACRpYSFiYyMfwLW9Xf15yAIZgKQtL1c7iFs0\nWQNAAgQyACRAIANAAgQyACRAIANAAgQyACRAIANAAgQyACRAIANAAgQyACRAIANAAgQyACRAIANA\nAgQyACRAIANAAgQyACRAIANAAgQyACRAIANAAgQyACRAIANAAgQyACRAIANAAgQyACRAIANAAgQy\nACRAIANAAgQyACRAIANAAgQyACRAIANAAgQyACRAIANAAo5NuwDT1mxG7OxELC1FLC9PuzQA1FVt\na8i7uxEXLkScOBFx8WLEykr+9e3b0y4ZAHVU20C+dClia6v93NZWxBNPTKc8ANRbLQO52YzY3IzY\n22s/v7eXn795czrlAqC+ahnIOzu9H9/enkw5AKClloF8/Hjvx5eWJlMOAGipZSCvrESsr0c0Gu3n\nG438vNHWAExaLQM5IuLKlYi1tfZza2v5eQCYtNrOQ15YiNjYyAdwbW+bhwzAdNU2kFuWlwUxANNX\n2yZrAEiJQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhk\nAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiA\nQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEjAsWkXYJY0mxE7OxFLSxHLy9MuDQBVooZcwu5uxIUL\nESdORFy8GLGykn99+/a0SwZAVQjkEi5ditjaaj+3tRXxxBPTKQ8A1SOQCzSbEZubEXt77ef39vLz\nN29Op1wAVItALrCz0/vx7e3JlAOAahPIBY4f7/340tJkygFAtQnkAisrEevrEY1G+/lGIz9vtDUA\noyCQS7hyJWJtrf3c2lp+HgBGwTzkEhYWIjY28gFc29vmIQMwegK5D8vLghiA8dBkDQAJEMgAkACB\nDAAJEMgAkACBDAAJEMgAkADTngBmmH3aq0MNGWAG2ae9egQywAyyT3v1CGSAGWOf9moSyAAzxj7t\n1SSQAWaMfdqrSSADzBj7tFeTQAaYQfZprx7zkAFmkH3aq0cgA8ww+7RXhyZrAEiAQAaABAhkAEiA\nQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaABAhkAEiAQAaA\nBAhkAEiAQAaABAhkAEiAQAaABAhkAEjAsWkXYBY0mxE7OxFLSxHLy9MuDQBVpIbcw+5uxIULESdO\nRFy8GLGykn99+/a0SwZA1QjkHi5ditjaaj+3tRXxxBPTKQ8A1SWQI2+S/uxnI27ebD+3uRmxt9d+\n7d5efv7gtQAwrFoHcq8m6Z2d3t+7vT2ZMkKRTh8ogdlT60Du1SR9/Hjv711aGl+5oAxjHKBaahvI\nRU3Sc3MR6+sRjUb7441Gft5oa6bNGAeoltoGcpkm6StXItbW2s8/+GDEz/2c5kGmyxgHqJ5aBvLu\nbsRv/mbva5aWIhYWIjY28pvfZz4Tce5cxIsvRvzkT2oeZLqMcYDqqWUgX7oU8fLLva+5fHk/bJeX\nI/7kTyJeeqn9Gs2DTIsxDlA9tQvkbk19hx0MW82DpGZlxRgHqJraBXJRU1/LwbDVPEiKOo1xWFvL\nzwOzp3ZrWRc19R22va15kDS1xjjcvJn/nlprHWZb7WrIKysRDz1U/vqlJc2DpG15OeKRR/wewqyr\nXSBH5HOM5+Z6X3M4bDUPAjBOtWuybjYjrl0rvu5w2PbbPGjLRgD6UbtALhqgNTcX8YM/mIdvJ8vL\nvQN2dzefVrW5uX9ufT0P94WF/ssLQD3Ursm6aIBWluWLfww6lclyhgAMonaB3BqgNV/wzgeZymS+\nMgCDql0gR+TNxz/wA72vGWQqk/nKAAyqloG8sBDxwgsRZ88erSkPM5XJfGUABlXLQG7567+OePjh\n9nPDTGUyXxmAQdVulPVB41jp6MqVfADXwVHW5isDUKTWgdzy5S9HvPZaxLFjwwey5QwBGEStA3ln\nJ+L06Yhbt/bPLS7m4XzffcM9d9F8ZQA4qNZ9yIfDOCL/+gMfmE55AKiv2gby5ubRMG65dSvi+ecn\nWx4A6q22gfx3f9f78Zdfnkw5ACCixoF8+nTvxxsNK2sBMDm1DeT19XwAVzdPPpnPK75wIeL27f3z\nzWbEZz87/bBOpRwAjEZtAzkiH03dK5Qj9jeG2N3Nw/nEiYiLFzuHdcs4w7KfcgAwO2odyPfdF/Hm\nmxFXr0Zcvtz5mtbGED/2Y8W7OE0iLO0mBVBNtQ7klocfjnjkkd7XvPBC8S5O4w5Lu0kBVJdA/rqi\njSF62d4uDstRTKOymxRAdQnkr7v33s79yfPz+a5Qvdy4EfH5z/e+5kMfGr752m5SANVV20A+PPDq\n0qWIt946et03fmPE00933sWp5cknI37hF4pfc9jma7tJAVRX7QK508Crc+c6NzdHRLz9dr6U5n/8\nR8T3fM9wr92rr7fsyOwrV/Ldow6ymxTA7Kvd5hKdBl699FLx973yyujKsL2d12abzYgvfjHi4x+P\n+NKX9h9fX88DdmHh6PfaTQqgmmoVyK2BV4fduTPa1/mpn4r48z/v/vi99+a19E5lidhv2t7Y6P4c\ndpMCqJZaNVkXjVIelW5hPDeX136feqr3qOvDTdtW5QKovloF8jBTm0YhyyJ++IfzsC1TK//CF3ov\nNDKNoPbhAGA8atVk3Rql3K2peBJ+7dfKX/vzPx/xn//Zfm5rK+KxxyLuuqv9ffTqdy6j2cxbELr1\nSe/u5v3vo3xNAPbVqoYckYfcrPjqVzsvNHLt2tEm7+efz0db91tzLbvcpyU7AcarNoHcCp7HH592\nSUbjcJP3nTv5AiX9rp9dJmgt2QkwfrUJ5Mcem25T9SSVrbmWDVpLdgKMXy36kJvNiL/5m2mXYjTm\n54sHhLUC9S//MuIbviFfyWtv72j/cFHQ/u3f5mHbbYWylqWl4j7ow/q9HurE30c9VT6Qd3cjfvRH\np12K0Xn3uyO+/dvz2mtRMD/22NFzBwdiFY06/8Vf3P/34mK+tOjB2nSjEfHBD+ZbV5Yd7GVwGHTn\n76PeKt9kfelStfo433kn4o03Bl/M5GBzdre1sefmjn7f7dsR99zTfq61hOfhPuheA8wMDoPu/H3U\nW6VryK++Wv1+43e/O+K//qv89a3m7N/4jYhv+7b8v++8E/Hii/vXZNnR77tzJ+LWrYhPfjLive/N\nQ/xf/qW9Fn3w2tYAs9XViD/4g4gHHui+UtrBPmvNc9SVvw8qHcidwqJq+gnjg556qvP5kycj/vEf\nu3/fz/5sxDd/c77ZRhk3buSbc5w9W7yNZWuNb6ijMoMn/X1UW2UDudmM+Id/mHYpZs8//VPxNWXD\n+KAXX2yvhXdyeD/nooEt4xr4YkAN02C/cyrbh/y5z027BLNp1BttlHF4P+eixUrKLmbSr3E9L5Rh\nv3MqG8jMjnvuiXjmmfzfzWbEww/3HtgyroEvBtTUU0rrs9vvvN4q22T9wQ9OuwSU9dZb+ZKmh9fn\nPqg1sOXq1fEMfDGgpn5SnGJkv/N6q2wNeWUl4ju+Y9qloIxu63N38sorvR8fdNUwq5HVT8otIsvL\nEY88IozrprKBvLubH8yOMv3XJ0/2fnzQgS8G1NSL9dlJUWUD+dFH8/m1VENrYMsf/VHnx+fm8sez\nbLD+QANq6kWLCCmqZCBvbhZPsWE87rprPM+7thbxiU9072POsnw61jAjpHsNqElp4A/D0yJCiioV\nyK1pKxcuTLsk9XTXXfnqaM1mxHPPDb9K2vx8vtJXs5kPdHnzzd7Xv/pq+9f99ge2BtS0yt9sRnz6\n0/lzmApVLVpESFGlArnTIA0m586diF/91fxmdvp0XqPtpGj3qJbW9KfWzbGoVnO4D3rQ/sCDA2pS\nHvjDcEwxIjWVCeRugzSYnFYAvvZaXgPp1m3wnvcM9vzdajXzBb/Fg/YHGvhTbZ1aRDY27Ko0Srp6\n+lOZQC4apMHk/MzP5BtRdPP22+Wep1NN9Pd//+iuU3ff3ft5Bu0PHOXAHzemdJliNHpWvRtMZQK5\nqDmTyem1OUU/OtVEf+mX8oVEDvrqV/P9mkfdHziKgT/TujFV9QNAVd9X1ejqGUxlAnllJeKhh6Zd\nCsahVRPd3OzehHzrVsSDD7aff/DB/vsDD97wRzHwZ9I3pqrWTKr6vqpIV8/gKhPIu7t2d6qqd96J\nOHWqePT85csR587tf/3ii3nwlblpd7vhP/PM4AN/pnFjqmrNpKrvq4rM8R5cZQL50UetzFVFd98d\n8fjj+b7KRX73dyNeeqn9XNmbdrcb/kc/OvjAn0nfmKpaM6nq+6oqc7wHV4lAbjYtBFJVZQeARUS8\n8ELxTbtTH2SZG/4gA38WF3s/PuobU1VrJmXfl/7lNJjjPbhKBLIR1hT5whfy5uxOfZCDBllRAHzs\nY53Pt5b5HPWNaZw1k2mGXdH7uvde/cupMcd7QFmWFR4RsRoR2fXr17MUvfFGluWLJzocnY9G4+i5\n+fksO306y06e7P29zWb779utW1m2vt5+zdmzWfb001n27LP59UW/k6+9Np6/hfX1o++10cjPD6LT\ne11fz7JXX82y5547+rM57I03yl1XpNf7GvV7ZnSazdH8/591169fzyIii4jVLOuRtb0e/P+LEg/k\nLMtviNO+6TuqdTQa+e/V4RtKpwA4fHzv9/Z+/Nd/fTw3qd3dzgG6uzvY83V6r3Nzxc/fLcgHLUe3\n9/Xqq71/znUPAtJQu0D+zGemfwN3VOu45572r9fXs+wv/mK0rzFMSPWqfY6iZlK25alTbXRctdbD\n7+u553qX7bnnhnu9bkZV86ceygZyJfqQIwzkYPQODyi7ejUf8T1KW1t531o/v79l5uQeHITWbSBb\nUZ/wF79YrjydBs6NY1R0s5n35y8tlV/ffNQD54aZD130MzcobTRm+ufYK61bRyRcQ+7UNOZwzOJx\nsLbcqQbWOnf2bLnaZ6e/jYceyrLz59vPra6292m3XufUqf7K36qNjrrWWtT8Pck+5EFeq6j8o27e\nr6uUf461abJ+6KHp30gd1ToO95FO6mg08t/nMiHa7WgNKsuy7n3d3d5fP6/T6Wi9blFTd7/NvEUh\nOOp+824GfV9F5TcobTRS/jnWIpCNrnaM45j2AMF+QnTU72GYDyPjqLX2E4LjHtE7SM2/qPybm+Xf\nH92N+kPgqNWiD/lzn5t2CaiaM2fyBUY6LWwwKZ22EM2y/p7j8IplZfX7OgddvRrx2GP5v0c1D7Wf\nOeLj3rWpbH/1wT7MovK/8krvx2d1MZdJq8qiOMemXYBh/Nu/TbsEVM0rr+Q30itX8iU3NzenXaLB\n3Lkzvuc+ebLzjl5ZFnHt2v7KZhsb+b8PD8TqR0rLMLZWoNraav/Q1GjkHzYWF/MBXgd/Z86e7f2c\nZ870fvxbv3Xw8tZJSr8nw5jJGnJrpONTT027JFTR9nb75vWXL0/mdefH8Nc4juf88Id7P36w5WrY\nWmtqyzD2qvl3Wg/95Zd7bw36oQ/l/52b6/x6Tz45urJX2cpKxPnzR3+Oc3P5+VlZrnMmA7nTLz6M\nysc/vj+NZXk54pd/eTKv+/DD+c2jU1P54RtNo5EvBfqHf9j7OUdZU26FyHd91+ies4yyzd+TmO5y\n8IPawc1G/v3f+9sa9GD5P/GJ7l0FNs/oz+Gf4zBdMFPRq4O5dURCg7oM5HJM4lhdbR8IMszo435e\nq9OI4fPnj75+0QCqiP4HaJ0/n2Xf8i3dH2+95rQG0HQbtJXCdJcyA766lb/oe1dX05i6k7KqDOrq\n+kDbRQkFctEvr8MxyqN1Yy9aonHQ44EHOt9sO928u93QO4V42ePkyf3n7Oem1u0Dyvnz4/m77yWF\n6S7DBELR96YydSdl01qxrazKBrIasmOSR+tmOK4Pgs8+O7q/jVZgP/ts+dc/uCBIPze1Sc39LZJS\nzWiYDwbr6/lmJ6m8l1mT0u9BJ7WY9gTj1lrucVxToEY5U6A1gOqHfqjc9efPRzzwwP7X/YxU7daX\nurDQf7mHkdJ0l2Gmel25EvH+9/e+Zlam7kxDaoP/BjVzgWzvY0ZlcbH8tXt7vf/gm83iAVadPPnk\n6Pfu7XZzOqjTqN5BbmrjnvtbJKXpLsN8SFlYKA7uWZm6My2V2IO5V/W5dYQma0cixzd9U/vXi4vd\nrz13Lt8FrNncb869enW/z3R3t/xgrdb1Rc20ZZoeDx/j6CMs269cpj86lfWAu0mhD3lUqvRepiXF\nPZgr24ecZYPd9BxpHO9/f5Y9/njxdb1GFh/+gzv49SB/jM1m3u/63d9d7mZ48DUObwLRKdDOnCkX\n/OO4gRT1J3cb7JLiTa2bWfwQ0U2V3gv7Kh3Iw4wqdUz++OhH2zc92Njoff0nP7n//3qSwdDPzbBo\nqk2ncg8ajsNIfbDLKM3Sh4giVXovlA/kuSzLCpu15+bmViPi+vXr12N1dXUMDeeDuXYt4kd+ZNql\nqJ6TJyP+9E/z/s3DywTOz+eDTy5fjvjXf434zu+MeO97Iz72sYgbN9oXopifzxe72Ng4+hr33psv\nmHDY4mLEm2+O/j31o8xyjxcudF9CsdP7jcj7FE+c6P66zeZ4+mIHKSswOjdu3IhTp05FRJzKsuxG\n1wt7pXXriMRqyAf91m9NvwY460ejkfe3Hv5E3k+Nsd+mti9/+Wj/7+Jifj51w9Q6p9FHqBkUpqts\nDXmmN5eIiPif/5l2CdI2N5ffglsajYh77mmvnbZGIh4eDdoaNVqmxtjPtRER992X14Sffz5f7/fB\nB/Pa9CwoM9Wm23vvtGnFuEeC9vv/BpiOmQ/k06enXYLhnDkTsboa8cd/HPHf/939uvn5iL/6q4hn\nnmm/mS8udm76jcjnmUbkTfstrZv/m2+WvzkvL5e/gfdzbUQewrMSxC3DTLWZZjj2+/8GmKyZD+T1\n9bzG99Zb0y5Jf1r9tK2FGX7v9/La4rVr+X+vX9+/9tSp/NzCQsSjjx69mR/8OuLojb7TzX9hwc15\nUEXb8JX5uQpH4LCZHtTV8s//nN/cOm3sPi7veU/E+94X8aUvHX3szJneG49fvVpcK9S8mLbbt482\nPa+vd276B+qt7KCuma8hR+T9kS+/HPH93z/+1zp+POK3fzvix388/7oVnMeORfzv/+4HaK+RrWWa\naNWg0qZfFhi1SgRyRMQHPpDXUJ5/vvsesHffHfH22/097/HjEb/yK/l/O910uwXnNAbvMHk+OAGj\nUplAjugcgufO5RvMf9/3tfe3HjuWNzv+zu9E/P3f719/9mzE00/nG44PU+tRgwKgH5UK5DIheLhG\n8xM/Md7QVIMCoIxKBXJLvyEoNAGYtpnbfhEAqkggA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAg\nA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAgA0AC\nBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIAJEAgA0ACBDIA\nJEAgA0ACjpW87l0REa+//voYiwIA1XMgO9/V67q5LMsKn2xubu5SRPzZ8MUCgNr66SzLPt3twbKB\nvBgR6xHxlYj42siKBgDV966IeF9EbGZZdqvbRaUCGQAYL4O6ACABAhkAEiCQASABAhkAEiCQASAB\nAhkAEiCQASAB/wfsASwywMDCfgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x2be6f1320>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Plot outputs, compare actual vs predicted values\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.scatter(\n",
    "  y_test,\n",
    "  predicted,\n",
    "  color='blue',\n",
    "  linewidth=1\n",
    ")\n",
    "\n",
    "plt.xticks(())\n",
    "plt.yticks(())\n",
    "\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3.0
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}